In [75]:
import pandas as pd
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,classification_report,recall_score,precision_score,f1_score
from sklearn.model_selection import KFold, cross_val_score
from sklearn import model_selection
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer()
sc = StandardScaler()
import seaborn as sns
import warnings
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

Load the dataset

It is always a good practice to eye-ball raw data to get a feel of the data in terms of

number of structure of the file, number of attributes, types of attributes and a general

idea of likely challenges in the dataset

In [21]:
df=pd.read_csv("C:/Users/HP/Downloads/parkinsons.data")
In [15]:
df.head()
Out[15]:
name MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
0 phon_R01_S01_1 119.992 157.302 74.997 0.00784 0.00007 0.00370 0.00554 0.01109 0.04374 ... 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031 0.266482 2.301442 0.284654
1 phon_R01_S01_2 122.400 148.650 113.819 0.00968 0.00008 0.00465 0.00696 0.01394 0.06134 ... 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192 0.335590 2.486855 0.368674
2 phon_R01_S01_3 116.682 131.111 111.555 0.01050 0.00009 0.00544 0.00781 0.01633 0.05233 ... 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179 0.311173 2.342259 0.332634
3 phon_R01_S01_4 116.676 137.871 111.366 0.00997 0.00009 0.00502 0.00698 0.01505 0.05492 ... 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501 0.334147 2.405554 0.368975
4 phon_R01_S01_5 116.014 141.781 110.655 0.01284 0.00011 0.00655 0.00908 0.01966 0.06425 ... 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787 0.234513 2.332180 0.410335

5 rows × 24 columns

In [33]:
df.describe().transpose()
Out[33]:
count mean std min 25% 50% 75% max
MDVP:Fo(Hz) 195.0 154.228641 41.390065 88.333000 117.572000 148.790000 182.769000 260.105000
MDVP:Fhi(Hz) 195.0 197.104918 91.491548 102.145000 134.862500 175.829000 224.205500 592.030000
MDVP:Flo(Hz) 195.0 116.324631 43.521413 65.476000 84.291000 104.315000 140.018500 239.170000
MDVP:Jitter(%) 195.0 0.006220 0.004848 0.001680 0.003460 0.004940 0.007365 0.033160
MDVP:Jitter(Abs) 195.0 0.000044 0.000035 0.000007 0.000020 0.000030 0.000060 0.000260
MDVP:RAP 195.0 0.003306 0.002968 0.000680 0.001660 0.002500 0.003835 0.021440
MDVP:PPQ 195.0 0.003446 0.002759 0.000920 0.001860 0.002690 0.003955 0.019580
Jitter:DDP 195.0 0.009920 0.008903 0.002040 0.004985 0.007490 0.011505 0.064330
MDVP:Shimmer 195.0 0.029709 0.018857 0.009540 0.016505 0.022970 0.037885 0.119080
MDVP:Shimmer(dB) 195.0 0.282251 0.194877 0.085000 0.148500 0.221000 0.350000 1.302000
Shimmer:APQ3 195.0 0.015664 0.010153 0.004550 0.008245 0.012790 0.020265 0.056470
Shimmer:APQ5 195.0 0.017878 0.012024 0.005700 0.009580 0.013470 0.022380 0.079400
MDVP:APQ 195.0 0.024081 0.016947 0.007190 0.013080 0.018260 0.029400 0.137780
Shimmer:DDA 195.0 0.046993 0.030459 0.013640 0.024735 0.038360 0.060795 0.169420
NHR 195.0 0.024847 0.040418 0.000650 0.005925 0.011660 0.025640 0.314820
HNR 195.0 21.885974 4.425764 8.441000 19.198000 22.085000 25.075500 33.047000
status 195.0 0.753846 0.431878 0.000000 1.000000 1.000000 1.000000 1.000000
RPDE 195.0 0.498536 0.103942 0.256570 0.421306 0.495954 0.587562 0.685151
DFA 195.0 0.718099 0.055336 0.574282 0.674758 0.722254 0.761881 0.825288
spread1 195.0 -5.684397 1.090208 -7.964984 -6.450096 -5.720868 -5.046192 -2.434031
spread2 195.0 0.226510 0.083406 0.006274 0.174351 0.218885 0.279234 0.450493
D2 195.0 2.381826 0.382799 1.423287 2.099125 2.361532 2.636456 3.671155
PPE 195.0 0.206552 0.090119 0.044539 0.137451 0.194052 0.252980 0.527367

Using univariate & bivariate analysis to check the individual attributes for their basic

statistic such as central values, spread, tails etc. What are your observations?

In [26]:
df.drop('name',axis=1,inplace=True)
In [39]:
sns.pairplot(df,diag_kind='kde')
Out[39]:
<seaborn.axisgrid.PairGrid at 0x29ec5057a88>
In [35]:
df.isnull().sum()
Out[35]:
MDVP:Fo(Hz)         0
MDVP:Fhi(Hz)        0
MDVP:Flo(Hz)        0
MDVP:Jitter(%)      0
MDVP:Jitter(Abs)    0
MDVP:RAP            0
MDVP:PPQ            0
Jitter:DDP          0
MDVP:Shimmer        0
MDVP:Shimmer(dB)    0
Shimmer:APQ3        0
Shimmer:APQ5        0
MDVP:APQ            0
Shimmer:DDA         0
NHR                 0
HNR                 0
status              0
RPDE                0
DFA                 0
spread1             0
spread2             0
D2                  0
PPE                 0
dtype: int64
In [38]:
df.dtypes
Out[38]:
MDVP:Fo(Hz)         float64
MDVP:Fhi(Hz)        float64
MDVP:Flo(Hz)        float64
MDVP:Jitter(%)      float64
MDVP:Jitter(Abs)    float64
MDVP:RAP            float64
MDVP:PPQ            float64
Jitter:DDP          float64
MDVP:Shimmer        float64
MDVP:Shimmer(dB)    float64
Shimmer:APQ3        float64
Shimmer:APQ5        float64
MDVP:APQ            float64
Shimmer:DDA         float64
NHR                 float64
HNR                 float64
status                int64
RPDE                float64
DFA                 float64
spread1             float64
spread2             float64
D2                  float64
PPE                 float64
dtype: object
In [40]:
df.skew(axis = 0, skipna = True) 
Out[40]:
MDVP:Fo(Hz)         0.591737
MDVP:Fhi(Hz)        2.542146
MDVP:Flo(Hz)        1.217350
MDVP:Jitter(%)      3.084946
MDVP:Jitter(Abs)    2.649071
MDVP:RAP            3.360708
MDVP:PPQ            3.073892
Jitter:DDP          3.362058
MDVP:Shimmer        1.666480
MDVP:Shimmer(dB)    1.999389
Shimmer:APQ3        1.580576
Shimmer:APQ5        1.798697
MDVP:APQ            2.618047
Shimmer:DDA         1.580618
NHR                 4.220709
HNR                -0.514317
status             -1.187727
RPDE               -0.143402
DFA                -0.033214
spread1             0.432139
spread2             0.144430
D2                  0.430384
PPE                 0.797491
dtype: float64
In [41]:
import seaborn as sns
sns.pairplot(df,diag_kind='kde',hue='status')
Out[41]:
<seaborn.axisgrid.PairGrid at 0x29edbdd6408>
In [50]:
corr = df.corr()
ax = sns.heatmap(
    corr, 
    vmin=0, vmax=1,
    cmap=sns.diverging_palette(20, 250, n=200),
    square=True
)
ax.set_xticklabels(
    ax.get_xticklabels(),
    rotation=90,
    horizontalalignment='right'
);
In [51]:
df.corr(method ='pearson')
Out[51]:
MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer MDVP:Shimmer(dB) ... Shimmer:DDA NHR HNR status RPDE DFA spread1 spread2 D2 PPE
MDVP:Fo(Hz) 1.000000 0.400985 0.596546 -0.118003 -0.382027 -0.076194 -0.112165 -0.076213 -0.098374 -0.073742 ... -0.094732 -0.021981 0.059144 -0.383535 -0.383894 -0.446013 -0.413738 -0.249450 0.177980 -0.372356
MDVP:Fhi(Hz) 0.400985 1.000000 0.084951 0.102086 -0.029198 0.097177 0.091126 0.097150 0.002281 0.043465 ... -0.003733 0.163766 -0.024893 -0.166136 -0.112404 -0.343097 -0.076658 -0.002954 0.176323 -0.069543
MDVP:Flo(Hz) 0.596546 0.084951 1.000000 -0.139919 -0.277815 -0.100519 -0.095828 -0.100488 -0.144543 -0.119089 ... -0.150737 -0.108670 0.210851 -0.380200 -0.400143 -0.050406 -0.394857 -0.243829 -0.100629 -0.340071
MDVP:Jitter(%) -0.118003 0.102086 -0.139919 1.000000 0.935714 0.990276 0.974256 0.990276 0.769063 0.804289 ... 0.746635 0.906959 -0.728165 0.278220 0.360673 0.098572 0.693577 0.385123 0.433434 0.721543
MDVP:Jitter(Abs) -0.382027 -0.029198 -0.277815 0.935714 1.000000 0.922911 0.897778 0.922913 0.703322 0.716601 ... 0.697170 0.834972 -0.656810 0.338653 0.441839 0.175036 0.735779 0.388543 0.310694 0.748162
MDVP:RAP -0.076194 0.097177 -0.100519 0.990276 0.922911 1.000000 0.957317 1.000000 0.759581 0.790652 ... 0.744919 0.919521 -0.721543 0.266668 0.342140 0.064083 0.648328 0.324407 0.426605 0.670999
MDVP:PPQ -0.112165 0.091126 -0.095828 0.974256 0.897778 0.957317 1.000000 0.957319 0.797826 0.839239 ... 0.763592 0.844604 -0.731510 0.288698 0.333274 0.196301 0.716489 0.407605 0.412524 0.769647
Jitter:DDP -0.076213 0.097150 -0.100488 0.990276 0.922913 1.000000 0.957319 1.000000 0.759555 0.790621 ... 0.744901 0.919548 -0.721494 0.266646 0.342079 0.064026 0.648328 0.324377 0.426556 0.671005
MDVP:Shimmer -0.098374 0.002281 -0.144543 0.769063 0.703322 0.759581 0.797826 0.759555 1.000000 0.987258 ... 0.987626 0.722194 -0.835271 0.367430 0.447424 0.159954 0.654734 0.452025 0.507088 0.693771
MDVP:Shimmer(dB) -0.073742 0.043465 -0.119089 0.804289 0.716601 0.790652 0.839239 0.790621 0.987258 1.000000 ... 0.963202 0.744477 -0.827805 0.350697 0.410684 0.165157 0.652547 0.454314 0.512233 0.695058
Shimmer:APQ3 -0.094717 -0.003743 -0.150747 0.746625 0.697153 0.744912 0.763580 0.744894 0.987625 0.963198 ... 1.000000 0.716207 -0.827123 0.347617 0.435242 0.151124 0.610967 0.402243 0.467265 0.645377
Shimmer:APQ5 -0.070682 -0.009997 -0.101095 0.725561 0.648961 0.709927 0.786780 0.709907 0.982835 0.973751 ... 0.960072 0.658080 -0.813753 0.351148 0.399903 0.213873 0.646809 0.457195 0.502174 0.702456
MDVP:APQ -0.077774 0.004937 -0.107293 0.758255 0.648793 0.737455 0.804139 0.737439 0.950083 0.960977 ... 0.896647 0.694019 -0.800407 0.364316 0.451379 0.157276 0.673158 0.502188 0.536869 0.721694
Shimmer:DDA -0.094732 -0.003733 -0.150737 0.746635 0.697170 0.744919 0.763592 0.744901 0.987626 0.963202 ... 1.000000 0.716215 -0.827130 0.347608 0.435237 0.151132 0.610971 0.402223 0.467261 0.645389
NHR -0.021981 0.163766 -0.108670 0.906959 0.834972 0.919521 0.844604 0.919548 0.722194 0.744477 ... 0.716215 1.000000 -0.714072 0.189429 0.370890 -0.131882 0.540865 0.318099 0.470949 0.552591
HNR 0.059144 -0.024893 0.210851 -0.728165 -0.656810 -0.721543 -0.731510 -0.721494 -0.835271 -0.827805 ... -0.827130 -0.714072 1.000000 -0.361515 -0.598736 -0.008665 -0.673210 -0.431564 -0.601401 -0.692876
status -0.383535 -0.166136 -0.380200 0.278220 0.338653 0.266668 0.288698 0.266646 0.367430 0.350697 ... 0.347608 0.189429 -0.361515 1.000000 0.308567 0.231739 0.564838 0.454842 0.340232 0.531039
RPDE -0.383894 -0.112404 -0.400143 0.360673 0.441839 0.342140 0.333274 0.342079 0.447424 0.410684 ... 0.435237 0.370890 -0.598736 0.308567 1.000000 -0.110950 0.591117 0.479905 0.236931 0.545886
DFA -0.446013 -0.343097 -0.050406 0.098572 0.175036 0.064083 0.196301 0.064026 0.159954 0.165157 ... 0.151132 -0.131882 -0.008665 0.231739 -0.110950 1.000000 0.195668 0.166548 -0.165381 0.270445
spread1 -0.413738 -0.076658 -0.394857 0.693577 0.735779 0.648328 0.716489 0.648328 0.654734 0.652547 ... 0.610971 0.540865 -0.673210 0.564838 0.591117 0.195668 1.000000 0.652358 0.495123 0.962435
spread2 -0.249450 -0.002954 -0.243829 0.385123 0.388543 0.324407 0.407605 0.324377 0.452025 0.454314 ... 0.402223 0.318099 -0.431564 0.454842 0.479905 0.166548 0.652358 1.000000 0.523532 0.644711
D2 0.177980 0.176323 -0.100629 0.433434 0.310694 0.426605 0.412524 0.426556 0.507088 0.512233 ... 0.467261 0.470949 -0.601401 0.340232 0.236931 -0.165381 0.495123 0.523532 1.000000 0.480585
PPE -0.372356 -0.069543 -0.340071 0.721543 0.748162 0.670999 0.769647 0.671005 0.693771 0.695058 ... 0.645389 0.552591 -0.692876 0.531039 0.545886 0.270445 0.962435 0.644711 0.480585 1.000000

23 rows × 23 columns

Split the dataset into training and test set in the ratio of 70:30 (Training:Test).

Create the model using “entropy” method of reducing the entropy and fit it to training

data.

Test the model on test data and what is the accuracy achieved. Capture the predicted

values and do a crosstab.

In [52]:
X=df.drop('status',axis=1)
y=df.status
In [143]:
dt_model.fit(train_set, train_labels)
Out[143]:
DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=42, splitter='best')
In [72]:
train_set, test_set, train_labels, test_labels = train_test_split(X, y, test_size=0.30, random_state=1)
dt_model = DecisionTreeClassifier(criterion = 'entropy',random_state=42,)
dt_model.fit(train_set, train_labels)
print("TrainingScore : {0} ".format(dt_model.score(train_set , train_labels)))
print("TestingScore :  {0}".format(dt_model.score(test_set , test_labels)))
TrainingScore : 1.0 
TestingScore :  0.864406779661017
In [84]:
dt.feature_importances_
Out[84]:
array([0.1550357 , 0.08974917, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.04970723, 0.02796418, 0.17685274, 0.        , 0.        ,
       0.03584669, 0.02987245, 0.04600149, 0.        , 0.        ,
       0.05823158, 0.33073875])
In [92]:
print(dt.predict(test_set))
dt.predict_proba(test_set)
[1 1 1 0 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 0
 1 0 0 1 1 1 0 1 1 1 1 1 1 0 0 1 0 0 0 0 0 1]
Out[92]:
array([[0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.]])

Use regularization parameters of max_depth, min_sample_leaf to recreate the model.

What is the impact on the model accuracy? How does regularization help?

In [144]:
from matplotlib import pyplot as plt
import numpy as np

neighbors = np.arange(1, 100)
train_accuracy_plot = np.empty(len(neighbors))
test_accuracy_plot = np.empty(len(neighbors))
# Loop over different values of k
for i, k in enumerate(neighbors):
    train = []
    test = []
    for j in range(20):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=j)
        sc=StandardScaler()
        scaledX_train = sc.fit_transform(X_train)
        scaledX_test = sc.transform(X_test)
        dt = DecisionTreeClassifier(max_depth=k)
        dt.fit(scaledX_train,y_train)
        train.append(dt.score(scaledX_train,y_train))
        test.append(dt.score(scaledX_test,y_test))
    #Compute accuracy on the training set
    train_accuracy_plot[i] = np.mean(train)
    #Compute accuracy on the testing set
    test_accuracy_plot[i] = np.mean(test)
# Generate plot
plt.title('Decision Tree: Varying Depth')
plt.plot(neighbors, test_accuracy_plot, label = 'Testing Accuracy')
plt.plot(neighbors, train_accuracy_plot, label = 'Training Accuracy')
plt.legend()
plt.xlabel('Max Depth')
plt.ylabel('Accuracy')
plt.show()
In [145]:
neighbors = np.arange(2, 100)
train_accuracy_plot = np.empty(len(neighbors))
test_accuracy_plot = np.empty(len(neighbors))
# Loop over different values of k
for i, k in enumerate(neighbors):
    train = []
    test = []
    for j in range(20):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=j)
        sc=StandardScaler()
        scaledX_train = sc.fit_transform(X_train)
        scaledX_test = sc.transform(X_test)
        dt = DecisionTreeClassifier(min_samples_split=k)
        dt.fit(scaledX_train,y_train)
        train.append(dt.score(scaledX_train,y_train))
        test.append(dt.score(scaledX_test,y_test))
    #Compute accuracy on the training set
    train_accuracy_plot[i] = np.mean(train)
    #Compute accuracy on the testing set
    test_accuracy_plot[i] = np.mean(test)
# Generate plot
plt.title('Decision Tree: Varying Depth')
plt.plot(neighbors, test_accuracy_plot, label = 'Testing Accuracy')
plt.plot(neighbors, train_accuracy_plot, label = 'Training Accuracy')
plt.legend()
plt.xlabel('min_samples_split')
plt.ylabel('Accuracy')
plt.show()
In [194]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=5)
sc=PowerTransformer()
scaledX_train = sc.fit_transform(X_train)
scaledX_test = sc.transform(X_test)
dt_model = DecisionTreeClassifier(criterion = 'entropy',random_state=1,min_samples_split=8,max_depth=50)
dt_model.fit(X_train, y_train)
print("TrainingScore : {0} ".format(dt_model.score(X_train , y_train)))
print("TestingScore :  {0}".format(dt_model.score(X_test , y_test)))
TrainingScore : 0.9779411764705882 
TestingScore :  0.864406779661017

AdaBoostClassifier

In [179]:
from sklearn.ensemble import AdaBoostClassifier
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=5)
sc=PowerTransformer()
scaledX_train = sc.fit_transform(X_train)
scaledX_test = sc.transform(X_test)
abcl = AdaBoostClassifier( n_estimators= 100)
abcl = abcl.fit(X_train, y_train)
print("Training Score")
print(abcl.score(X_train , y_train))
print("Testing Score")
print(abcl.score(X_test , y_test))
Training Score
1.0
Testing Score
0.8813559322033898

GradientBoostingClassifier

In [180]:
from sklearn.ensemble import GradientBoostingClassifier
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=5)
sc=PowerTransformer()
scaledX_train = sc.fit_transform(X_train)
scaledX_test = sc.transform(X_test)
gbcl = GradientBoostingClassifier(n_estimators = 200, learning_rate = 0.05)
gbcl = gbcl.fit(X_train, y_train)
print("Training Score")
print(gbcl.score(X_train , y_train))
print("Testing Score")
print(gbcl.score(X_test , y_test))
Training Score
1.0
Testing Score
0.9322033898305084

Next implement the decision tree using Random Forest. What is the optimal number of

trees that gives the best result

In [164]:
estimators = np.arange(1, 300,20)
train_accuracy_plot = np.empty(len(neighbors))
test_accuracy_plot = np.empty(len(neighbors))
# Loop over different values of k
for i, k in enumerate(estimators):
    train = []
    test = []
    for j in range(20):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=j)
        sc=StandardScaler()
        scaledX_train = sc.fit_transform(X_train)
        scaledX_test = sc.transform(X_test)
        rfcl = RandomForestClassifier(n_estimators =k,criterion='entropy')
        rfcl.fit(scaledX_train,y_train)
        train.append(rfcl.score(scaledX_train,y_train))
        test.append(rfcl.score(scaledX_test,y_test))
    #Compute accuracy on the training set
    train_accuracy_plot[i] = np.mean(train)
    #Compute accuracy on the testing set
    test_accuracy_plot[i] = np.mean(test)
# Generate plot
plt.title('Random Forest: Optimal Number of Trees')
plt.plot(neighbors, test_accuracy_plot, label = 'Testing Accuracy')
plt.plot(neighbors, train_accuracy_plot, label = 'Training Accuracy')
plt.legend()
plt.xlabel('Number of Trees')
plt.ylabel('Accuracy')
plt.show()
In [193]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=3)
sc=StandardScaler()
scaledX_train = sc.fit_transform(X_train)
scaledX_test = sc.transform(X_test)
rfcl = RandomForestClassifier(n_estimators =200,criterion='entropy')
rfcl.fit(scaledX_train,y_train)
print("Training Score")
print((rfcl.score(scaledX_train,y_train)))
print("Testing Score")
print((rfcl.score(scaledX_test,y_test)))
Training Score
1.0
Testing Score
0.9152542372881356
In [ ]:
 
In [ ]: